Importing packages

#install.packages("plyr")
#install.packages("dplyr")
#install.packages("tidyr")
#install.packages("tidyverse")
#install.packages("psych")
#install.packages("ggpubr")
#install.packages("ggplot2")
#install.packages("plotly")
#install.packages("moments")
#install.packages('gmodels')

library(plyr) 
library(dplyr)
## 
## Attaching package: 'dplyr'
## The following objects are masked from 'package:plyr':
## 
##     arrange, count, desc, failwith, id, mutate, rename, summarise,
##     summarize
## The following objects are masked from 'package:stats':
## 
##     filter, lag
## The following objects are masked from 'package:base':
## 
##     intersect, setdiff, setequal, union
library(tidyr)
library(tidyverse)
## ── Attaching packages
## ───────────────────────────────────────
## tidyverse 1.3.2 ──
## ✔ ggplot2 3.4.0     ✔ purrr   0.3.5
## ✔ tibble  3.1.8     ✔ stringr 1.4.1
## ✔ readr   2.1.3     ✔ forcats 0.5.2
## ── Conflicts ────────────────────────────────────────── tidyverse_conflicts() ──
## ✖ dplyr::arrange()   masks plyr::arrange()
## ✖ purrr::compact()   masks plyr::compact()
## ✖ dplyr::count()     masks plyr::count()
## ✖ dplyr::failwith()  masks plyr::failwith()
## ✖ dplyr::filter()    masks stats::filter()
## ✖ dplyr::id()        masks plyr::id()
## ✖ dplyr::lag()       masks stats::lag()
## ✖ dplyr::mutate()    masks plyr::mutate()
## ✖ dplyr::rename()    masks plyr::rename()
## ✖ dplyr::summarise() masks plyr::summarise()
## ✖ dplyr::summarize() masks plyr::summarize()
library(psych)
## 
## Attaching package: 'psych'
## 
## The following objects are masked from 'package:ggplot2':
## 
##     %+%, alpha
library(ggpubr)
## 
## Attaching package: 'ggpubr'
## 
## The following object is masked from 'package:plyr':
## 
##     mutate
library(ggplot2)
library(plotly)
## 
## Attaching package: 'plotly'
## 
## The following object is masked from 'package:ggplot2':
## 
##     last_plot
## 
## The following objects are masked from 'package:plyr':
## 
##     arrange, mutate, rename, summarise
## 
## The following object is masked from 'package:stats':
## 
##     filter
## 
## The following object is masked from 'package:graphics':
## 
##     layout
library(moments)
library(gmodels)

Import the file bank.csv

bank_unclean = read.csv("~/Downloads/Archive (1)/bank.csv")
bank_unclean

Data Cleaning

# Sorted dataframe with descending age
bank_sort <- bank_unclean[order(-bank_unclean$age),]
head(bank_sort)
# Drop column "default"
bank_drop <- select(bank_sort, -5)
head(bank_drop)
# Rename column 
names(bank_drop)[names(bank_drop) == 'contact'] <- 'Contact_Info'
head(bank_drop)
# Remove the top 55 % and bottom 45%
bank <- bank_drop[6139:5022,] # Remove the top 55 % and bottom 45%

# Replace column string to Captial letters using GSUB
bank$marital <- gsub("single","SINGLE",as.character(bank$marital))
bank$marital <- gsub("married","MARRIED",as.character(bank$marital))
bank$marital <- gsub("divorced","DIVORCED",as.character(bank$marital))
head(bank)
str(bank)
## 'data.frame':    1118 obs. of  16 variables:
##  $ age         : int  37 37 37 37 37 37 37 37 37 37 ...
##  $ job         : chr  "management" "technician" "management" "management" ...
##  $ marital     : chr  "SINGLE" "SINGLE" "MARRIED" "MARRIED" ...
##  $ education   : chr  "tertiary" "tertiary" "tertiary" "tertiary" ...
##  $ balance     : int  102 0 156 0 480 443 0 4017 1113 4151 ...
##  $ housing     : chr  "yes" "yes" "no" "no" ...
##  $ loan        : chr  "no" "no" "no" "no" ...
##  $ Contact_Info: chr  "cellular" "cellular" "cellular" "cellular" ...
##  $ day         : int  6 23 19 15 22 29 8 30 2 30 ...
##  $ month       : chr  "may" "jul" "nov" "jan" ...
##  $ duration    : int  445 366 366 426 344 1600 257 665 229 543 ...
##  $ campaign    : int  1 6 3 2 2 1 2 2 1 4 ...
##  $ pdays       : int  258 -1 -1 196 182 -1 97 196 182 -1 ...
##  $ previous    : int  2 0 0 1 8 0 1 1 1 0 ...
##  $ poutcome    : chr  "failure" "unknown" "unknown" "other" ...
##  $ deposit     : chr  "yes" "yes" "yes" "yes" ...
#get the Mean, Min, Max, STD of each age balance
bank %>%
group_by(age)%>%
summarise_at(vars(balance),list(Mean=mean, Min=min, Max=max,STD=sd))
#get the Mean, Min, Max, STD of each campaign duration
bank %>%
group_by(campaign)%>%
summarise_at(vars(duration),list(Mean=mean, Min=min, Max=max,STD=sd))

Subsets of data

subset(bank, balance == 1) #Find subset of balance 
subset(bank, age == 20:40) #subset of ages between 20-40 years old
## Warning in age == 20:40: longer object length is not a multiple of shorter
## object length
subset(bank, campaign == 10) #subset of campaign between only counted 10

Table For Education and Job

#1st type of- Frequency Table
bank_table <- table(bank$education, bank$job)
bank_table
##            
##             admin. blue-collar entrepreneur housemaid management retired
##   primary        6          78            3         9          5       0
##   secondary    116         106           17        15         28       3
##   tertiary      16           5           21         5        235       1
##   unknown        4          12            1         0          9       0
##            
##             self-employed services student technician unemployed unknown
##   primary               0       11       0          3          3       0
##   secondary            18      107       2        138         24       0
##   tertiary             13        7       4         65         12       3
##   unknown               0        1       4          7          0       1

Frequency Tables For Deposit and Age, Contact info and Marital Status

#Frequency Table for deposit and age
bank_Ftable2 <- table(bank$deposit, bank$age) 
ftable(bank_Ftable2)
##       37  38  39  40
##                     
## no     0 209 200 210
## yes  150 144 143  62
#Frequency Table for Contact_Info and marital status
bank_Ftable3 <- table(bank$Contact_Info, bank$marital) 
ftable(bank_Ftable3)
##            DIVORCED MARRIED SINGLE
##                                   
## cellular         90     487    197
## telephone         3      31     14
## unknown          49     177     70

Cross Table for Job and Age, Job and Education, Age and Housing

#create CrossTable for 'Job' and 'Education'
banl_ct1 <- CrossTable(bank$education, bank$job,
                 dnn = c("Job", "Education"))
## 
##  
##    Cell Contents
## |-------------------------|
## |                       N |
## | Chi-square contribution |
## |           N / Row Total |
## |           N / Col Total |
## |         N / Table Total |
## |-------------------------|
## 
##  
## Total Observations in Table:  1118 
## 
##  
##              | Education 
##          Job |        admin. |   blue-collar |  entrepreneur |     housemaid |    management |       retired | self-employed |      services |       student |    technician |    unemployed |       unknown |     Row Total | 
## -------------|---------------|---------------|---------------|---------------|---------------|---------------|---------------|---------------|---------------|---------------|---------------|---------------|---------------|
##      primary |             6 |            78 |             3 |             9 |             5 |             0 |             0 |            11 |             0 |             3 |             3 |             0 |           118 | 
##              |         5.389 |       151.997 |         0.463 |        11.524 |        20.091 |         0.422 |         3.272 |         0.397 |         1.055 |        16.882 |         0.303 |         0.422 |               | 
##              |         0.051 |         0.661 |         0.025 |         0.076 |         0.042 |         0.000 |         0.000 |         0.093 |         0.000 |         0.025 |         0.025 |         0.000 |         0.106 | 
##              |         0.042 |         0.388 |         0.071 |         0.310 |         0.018 |         0.000 |         0.000 |         0.087 |         0.000 |         0.014 |         0.077 |         0.000 |               | 
##              |         0.005 |         0.070 |         0.003 |         0.008 |         0.004 |         0.000 |         0.000 |         0.010 |         0.000 |         0.003 |         0.003 |         0.000 |               | 
## -------------|---------------|---------------|---------------|---------------|---------------|---------------|---------------|---------------|---------------|---------------|---------------|---------------|---------------|
##    secondary |           116 |           106 |            17 |            15 |            28 |             3 |            18 |           107 |             2 |           138 |            24 |             0 |           574 | 
##              |        25.474 |         0.076 |         0.966 |         0.001 |        91.729 |         0.436 |         0.273 |        27.672 |         1.913 |         7.502 |         0.790 |         2.054 |               | 
##              |         0.202 |         0.185 |         0.030 |         0.026 |         0.049 |         0.005 |         0.031 |         0.186 |         0.003 |         0.240 |         0.042 |         0.000 |         0.513 | 
##              |         0.817 |         0.527 |         0.405 |         0.517 |         0.101 |         0.750 |         0.581 |         0.849 |         0.200 |         0.648 |         0.615 |         0.000 |               | 
##              |         0.104 |         0.095 |         0.015 |         0.013 |         0.025 |         0.003 |         0.016 |         0.096 |         0.002 |         0.123 |         0.021 |         0.000 |               | 
## -------------|---------------|---------------|---------------|---------------|---------------|---------------|---------------|---------------|---------------|---------------|---------------|---------------|---------------|
##     tertiary |            16 |             5 |            21 |             5 |           235 |             1 |            13 |             7 |             4 |            65 |            12 |             3 |           387 | 
##              |        22.362 |        59.936 |         2.872 |         2.529 |       201.837 |         0.107 |         0.480 |        30.739 |         0.084 |         1.034 |         0.167 |         1.885 |               | 
##              |         0.041 |         0.013 |         0.054 |         0.013 |         0.607 |         0.003 |         0.034 |         0.018 |         0.010 |         0.168 |         0.031 |         0.008 |         0.346 | 
##              |         0.113 |         0.025 |         0.500 |         0.172 |         0.848 |         0.250 |         0.419 |         0.056 |         0.400 |         0.305 |         0.308 |         0.750 |               | 
##              |         0.014 |         0.004 |         0.019 |         0.004 |         0.210 |         0.001 |         0.012 |         0.006 |         0.004 |         0.058 |         0.011 |         0.003 |               | 
## -------------|---------------|---------------|---------------|---------------|---------------|---------------|---------------|---------------|---------------|---------------|---------------|---------------|---------------|
##      unknown |             4 |            12 |             1 |             0 |             9 |             0 |             0 |             1 |             4 |             7 |             0 |             1 |            39 | 
##              |         0.184 |         3.549 |         0.148 |         1.012 |         0.045 |         0.140 |         1.081 |         2.623 |        38.216 |         0.025 |         1.360 |         5.306 |               | 
##              |         0.103 |         0.308 |         0.026 |         0.000 |         0.231 |         0.000 |         0.000 |         0.026 |         0.103 |         0.179 |         0.000 |         0.026 |         0.035 | 
##              |         0.028 |         0.060 |         0.024 |         0.000 |         0.032 |         0.000 |         0.000 |         0.008 |         0.400 |         0.033 |         0.000 |         0.250 |               | 
##              |         0.004 |         0.011 |         0.001 |         0.000 |         0.008 |         0.000 |         0.000 |         0.001 |         0.004 |         0.006 |         0.000 |         0.001 |               | 
## -------------|---------------|---------------|---------------|---------------|---------------|---------------|---------------|---------------|---------------|---------------|---------------|---------------|---------------|
## Column Total |           142 |           201 |            42 |            29 |           277 |             4 |            31 |           126 |            10 |           213 |            39 |             4 |          1118 | 
##              |         0.127 |         0.180 |         0.038 |         0.026 |         0.248 |         0.004 |         0.028 |         0.113 |         0.009 |         0.191 |         0.035 |         0.004 |               | 
## -------------|---------------|---------------|---------------|---------------|---------------|---------------|---------------|---------------|---------------|---------------|---------------|---------------|---------------|
## 
## 
#create CrossTable for 'age' and 'Housing'
banl_ct2 <- CrossTable(bank$age, bank$housing,
                 dnn = c("Age", "Housing"))
## 
##  
##    Cell Contents
## |-------------------------|
## |                       N |
## | Chi-square contribution |
## |           N / Row Total |
## |           N / Col Total |
## |         N / Table Total |
## |-------------------------|
## 
##  
## Total Observations in Table:  1118 
## 
##  
##              | Housing 
##          Age |        no |       yes | Row Total | 
## -------------|-----------|-----------|-----------|
##           37 |        84 |        66 |       150 | 
##              |     6.547 |     4.818 |           | 
##              |     0.560 |     0.440 |     0.134 | 
##              |     0.177 |     0.102 |           | 
##              |     0.075 |     0.059 |           | 
## -------------|-----------|-----------|-----------|
##           38 |       147 |       206 |       353 | 
##              |     0.047 |     0.035 |           | 
##              |     0.416 |     0.584 |     0.316 | 
##              |     0.310 |     0.320 |           | 
##              |     0.131 |     0.184 |           | 
## -------------|-----------|-----------|-----------|
##           39 |       139 |       204 |       343 | 
##              |     0.284 |     0.209 |           | 
##              |     0.405 |     0.595 |     0.307 | 
##              |     0.293 |     0.317 |           | 
##              |     0.124 |     0.182 |           | 
## -------------|-----------|-----------|-----------|
##           40 |       104 |       168 |       272 | 
##              |     1.111 |     0.818 |           | 
##              |     0.382 |     0.618 |     0.243 | 
##              |     0.219 |     0.261 |           | 
##              |     0.093 |     0.150 |           | 
## -------------|-----------|-----------|-----------|
## Column Total |       474 |       644 |      1118 | 
##              |     0.424 |     0.576 |           | 
## -------------|-----------|-----------|-----------|
## 
## 

Impact Of Marital Status On Education

#ggplot for education and marital status
ggplot(data = bank) + geom_bar(mapping = aes(x=marital, fill=education), position="dodge") + labs(title="The Impact Of Marital On Education", x ="Marital Stauts", y = "Count")

Housing At Different Ages- Clean Data

# ggplot for housing and different age
ggplot(data = bank) + geom_bar(mapping = aes(x=age, fill=housing), position="dodge") + labs(title="Housing At Different Ages", x ="Age", y = "count")

Housing At Different Ages- Unclean Data

#ggplot for housing and different age before cleaning the data, bigger range of age
ggplot(data = bank_unclean) + geom_bar(mapping = aes(x=age, fill=housing), position="dodge") + labs(title="Housing At Different Ages-unclean data", x ="Age", y = "count")

Contact Info Based On Ages

#create ggplot for age and contact-info
ggplot(data = bank) + geom_bar(mapping = aes(x=age, fill= Contact_Info), position="dodge") + labs(title="Different contact info based on different Ages", x ="Age", y = "contact-info")

Top 6 jobs based on different ages (Box plot)

#Top 6 jobs based on different ages
bankrank <- bank %>%
  
select(education,age, job)

Mostjob <- aggregate(age ~ job, bankrank, mean)
slice( Mostjob[order(-Mostjob$age),],
1:10)
#create box plot for top 6 jobs based on the previous code

boxplot( bankrank$age ~ bankrank$job, main="Top 6 jobs based on different ages", xlab = "job", ylab = "age ", col = "purple")

Housing based on different ages (Box plot)

#create box plot for housing and different ages

box1 <- boxplot( bank$age ~ bank$housing, main="box plot of Housing based on different ages", xlab = "housing", ylab = "age ", col = "light green")

#create box plot; if different ages has house or not
box2 <- boxplot( bank$age ~ bank$loan, main="box plot of getting loan based on different ages", xlab = "loan", ylab = "age ", col = "light pink")

Age and Contact Info (BoxPlot)

#create ggplot-box plot based age and contact-info

ggplot(bank, aes(x = age, y = Contact_Info, color = "red")) +  # ggplot function
  geom_boxplot()

Job (Histogram Plot)

#plotly for job

hist1 <-  plot_ly(data = bank ,x =  ~job, color = ~"pink", type = 'histogram', mode = 'markers')%>%
        layout(title = 'Job histogram plot', plot_bgcolor = "white")
hist1
## Warning in RColorBrewer::brewer.pal(N, "Set2"): minimal value for n is 3, returning requested palette with 3 different levels

## Warning in RColorBrewer::brewer.pal(N, "Set2"): minimal value for n is 3, returning requested palette with 3 different levels
## Warning: 'histogram' objects don't have these attributes: 'mode'
## Valid attributes include:
## '_deprecated', 'alignmentgroup', 'autobinx', 'autobiny', 'bingroup', 'cliponaxis', 'constraintext', 'cumulative', 'customdata', 'customdatasrc', 'error_x', 'error_y', 'histfunc', 'histnorm', 'hoverinfo', 'hoverinfosrc', 'hoverlabel', 'hovertemplate', 'hovertemplatesrc', 'hovertext', 'hovertextsrc', 'ids', 'idssrc', 'insidetextanchor', 'insidetextfont', 'legendgroup', 'legendgrouptitle', 'legendrank', 'marker', 'meta', 'metasrc', 'name', 'nbinsx', 'nbinsy', 'offsetgroup', 'opacity', 'orientation', 'outsidetextfont', 'selected', 'selectedpoints', 'showlegend', 'stream', 'text', 'textangle', 'textfont', 'textposition', 'textsrc', 'texttemplate', 'transforms', 'type', 'uid', 'uirevision', 'unselected', 'visible', 'x', 'xaxis', 'xbins', 'xcalendar', 'xhoverformat', 'xsrc', 'y', 'yaxis', 'ybins', 'ycalendar', 'yhoverformat', 'ysrc', 'key', 'set', 'frame', 'transforms', '_isNestedKey', '_isSimpleKey', '_isGraticule', '_bbox'

Loan based on different ages and in different day (Histogram Plot)

#plotly for loan based on different ages and in different day

bank%>%
  group_by(loan) %>%
  do(p=plot_ly(., x = ~log(day),name =~age, type = "histogram")) %>%
  subplot(nrows = 1, shareX = TRUE, shareY = FALSE)%>%
        layout(title = 'Loan based on different ages and in different day', plot_bgcolor = "white")

Contact based on different Contact_Info in different ages (Histogram Plot)

#plotly for contact based on different Contact_Info in different ages

bank%>%
  group_by(age) %>%
  do(p=plot_ly(., x = ~log(age),name =~Contact_Info, type = "histogram")) %>%
  subplot(nrows = 1, shareX = TRUE, shareY = TRUE)%>%
        layout(title = 'Different contact info based on different ages', plot_bgcolor = "white")

Density axis and dual y-axis for relation between age and education (Histogram Plot)

#plotly with density axis and dual y-axis for relation between age and education
bankdens <- density(log(bank$age))
 
plot_ly( data = bank,
  x = ~log(age), 
  type = "histogram",
  name = "age") %>% 
  add_lines(x = bankdens$x, y = bankdens$y, yaxis = "y2", name = "education") %>% 
  layout(yaxis2 = list(overlaying = "y", #Adds the dual y-axis
                       side = "right", #Adds the density axis on the right side
                       rangemode = "tozero"))%>% #Forces both y-axes to start at 0
        layout(title = 'relation between age and education', plot_bgcolor = "white")  

Density axis and dual y-axis for relation between age and education

#plotly with density axis and dual y-axis for relation between age and education
plot_ly( data = bank,
  x = ~duration,
  y = ~log(campaign),
  type = "violin",
  color = ~"pink",
  side = "positive",
  meanline = list(visible = T))%>%
        layout(title = 'relation between duration and campaign', plot_bgcolor = "white")
## Warning in RColorBrewer::brewer.pal(N, "Set2"): minimal value for n is 3, returning requested palette with 3 different levels

## Warning in RColorBrewer::brewer.pal(N, "Set2"): minimal value for n is 3, returning requested palette with 3 different levels

violin plot for relation between age and education

#plotly with density axis and dual y-axis for relation between age and education
plot_ly( data = bank,
  x = ~age,
  y = ~log(campaign),
  type = "violin",
  color = ~"pink",
  side = "negative",
  meanline = list(visible = T))%>%
        layout(title = 'Relation between age and campaign in violin plot', plot_bgcolor = "white")
## Warning in RColorBrewer::brewer.pal(N, "Set2"): minimal value for n is 3, returning requested palette with 3 different levels

## Warning in RColorBrewer::brewer.pal(N, "Set2"): minimal value for n is 3, returning requested palette with 3 different levels

References

  1. Kabacoff, R. I. (2015). R in Action SECOND EDITION Data analysis and graphics with R (2nd ed.) by Manning Publications Co.

  2. Ploty Graphics Library (2021). Bar Charts in R. Https://Plotly.Com/. https://plotly.com/r/barcharts/

  3. Quick-R by Datacamp (2017). Subsetting Data. Www.Statmethods.Net. https://www.statmethods.net/management/subset.html

  4. Stackoverflow (2016) .R “Error: unexpected ‘}’ in”} “[duplicate]. Https://Stackoverflow.Com/. https://stackoverflow.com/questions/40291675/r-error-unexpected-in

Appendix

#install.packages("plyr")
#install.packages("dplyr")
#install.packages("tidyr")
#install.packages("tidyverse")
#install.packages("psych")
#install.packages("ggpubr")
#install.packages("ggplot2")
#install.packages("plotly")
#install.packages("moments")
#install.packages('gmodels')

library(plyr) 
library(dplyr)
library(tidyr)
library(tidyverse)
library(psych)
library(ggpubr)
library(ggplot2)
library(plotly)
library(moments)
library(gmodels)

bank_unclean = read.csv("~/Downloads/Archive (1)/bank.csv")
bank_unclean

# Sorted dataframe with descending age
bank_sort <- bank_unclean[order(-bank_unclean$age),]
head(bank_sort)

# Drop column "default"
bank_drop <- select(bank_sort, -5)
head(bank_drop)

# Rename column 
names(bank_drop)[names(bank_drop) == 'contact'] <- 'Contact_Info'
head(bank_drop)

# Remove the top 55 % and bottom 45%
bank <- bank_drop[6139:5022,] # Remove the top 55 % and bottom 45%

# Replace column string to Captial letters using GSUB
bank$marital <- gsub("single","SINGLE",as.character(bank$marital))
bank$marital <- gsub("married","MARRIED",as.character(bank$marital))
bank$marital <- gsub("divorced","DIVORCED",as.character(bank$marital))
head(bank)

str(bank)
#get the Mean, Min, Max, STD of each age balance
bank %>%
group_by(age)%>%
summarise_at(vars(balance),list(Mean=mean, Min=min, Max=max,STD=sd))

#get the Mean, Min, Max, STD of each campaign duration
bank %>%
group_by(campaign)%>%
summarise_at(vars(duration),list(Mean=mean, Min=min, Max=max,STD=sd))


subset(bank, balance == 1) #Find subset of balance 
subset(bank, age == 20:40) #subset of ages between 20-40 years old
subset(bank, campaign == 10) #subset of campaign between only counted 10

#1st type of- Frequency Table
bank_table <- table(bank$education, bank$job)
bank_table

#Frequency Table for deposit and age
bank_Ftable2 <- table(bank$deposit, bank$age) 
ftable(bank_Ftable2)

#Frequency Table for Contact_Info and marital status
bank_Ftable3 <- table(bank$Contact_Info, bank$marital) 
ftable(bank_Ftable3)

#create CrossTable for 'Job' and 'Education'
banl_ct1 <- CrossTable(bank$education, bank$job,
                 dnn = c("Job", "Education"))

#create CrossTable for 'age' and 'Housing'
banl_ct2 <- CrossTable(bank$age, bank$housing,
                 dnn = c("Age", "Housing"))

#ggplot for education and marital status
ggplot(data = bank) + geom_bar(mapping = aes(x=marital, fill=education), position="dodge") + labs(title="The Impact Of Marital On Education", x ="Marital Stauts", y = "Count")

# ggplot for housing and different age
ggplot(data = bank) + geom_bar(mapping = aes(x=age, fill=housing), position="dodge") + labs(title="Housing At Different Ages", x ="Age", y = "count")

#ggplot for housing and different age before cleaning the data, bigger range of age
ggplot(data = bank_unclean) + geom_bar(mapping = aes(x=age, fill=housing), position="dodge") + labs(title="Housing At Different Ages-unclean data", x ="Age", y = "count")

#create ggplot for age and contact-info
ggplot(data = bank) + geom_bar(mapping = aes(x=age, fill= Contact_Info), position="dodge") + labs(title="Different contact info based on different Ages", x ="Age", y = "contact-info")

#Top 6 jobs based on different ages
bankrank <- bank %>%
  
select(education,age, job)

Mostjob <- aggregate(age ~ job, bankrank, mean)
slice( Mostjob[order(-Mostjob$age),],
1:10)


#create box plot for top 6 jobs based on the previous code

boxplot( bankrank$age ~ bankrank$job, main="Top 6 jobs based on different ages", xlab = "job", ylab = "age ", col = "purple")

#create box plot for housing and different ages

box1 <- boxplot( bank$age ~ bank$housing, main="box plot of Housing based on different ages", xlab = "housing", ylab = "age ", col = "light green")

#create box plot; if different ages has house or not
box2 <- boxplot( bank$age ~ bank$loan, main="box plot of getting loan based on different ages", xlab = "loan", ylab = "age ", col = "light pink")

#create ggplot-box plot based age and contact-info

ggplot(bank, aes(x = age, y = Contact_Info, color = "red")) +  # ggplot function
  geom_boxplot()

#plotly for job

hist1 <-  plot_ly(data = bank ,x =  ~job, color = ~"pink", type = 'histogram', mode = 'markers')%>%
        layout(title = 'Job histogram plot', plot_bgcolor = "white")
hist1

#plotly for loan based on different ages and in different day

bank%>%
  group_by(loan) %>%
  do(p=plot_ly(., x = ~log(day),name =~age, type = "histogram")) %>%
  subplot(nrows = 1, shareX = TRUE, shareY = FALSE)%>%
        layout(title = 'Loan based on different ages and in different day', plot_bgcolor = "white")


#plotly for contact based on different Contact_Info in different ages

bank%>%
  group_by(age) %>%
  do(p=plot_ly(., x = ~log(age),name =~Contact_Info, type = "histogram")) %>%
  subplot(nrows = 1, shareX = TRUE, shareY = TRUE)%>%
        layout(title = 'Different contact info based on different ages', plot_bgcolor = "white")


#plotly with density axis and dual y-axis for relation between age and education
bankdens <- density(log(bank$age))
 
plot_ly( data = bank,
  x = ~log(age), 
  type = "histogram",
  name = "age") %>% 
  add_lines(x = bankdens$x, y = bankdens$y, yaxis = "y2", name = "education") %>% 
  layout(yaxis2 = list(overlaying = "y", #Adds the dual y-axis
                       side = "right", #Adds the density axis on the right side
                       rangemode = "tozero"))%>% #Forces both y-axes to start at 0
        layout(title = 'relation between age and education', plot_bgcolor = "white")  


#plotly with density axis and dual y-axis for relation between age and education
plot_ly( data = bank,
  x = ~duration,
  y = ~log(campaign),
  type = "violin",
  color = ~"pink",
  side = "positive",
  meanline = list(visible = T))%>%
        layout(title = 'relation between duration and campaign', plot_bgcolor = "white")

#plotly with density axis and dual y-axis for relation between age and education
plot_ly( data = bank,
  x = ~age,
  y = ~log(campaign),
  type = "violin",
  color = ~"pink",
  side = "negative",
  meanline = list(visible = T))%>%
        layout(title = 'Relation between age and campaign in violin plot', plot_bgcolor = "white")


##